#' ---
#' title: "Regression and Other Stories: SimpleCausal"
#' author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
#' date: "`r format(Sys.Date())`"
#' output:
#'   html_document:
#'     theme: readable
#'     toc: true
#'     toc_depth: 2
#'     toc_float: true
#'     code_download: true
#' ---

#' Simple graphs illustrating regression for causal inference. See
#' Chapter 1 in Regression and Other Stories.
#'
#' The simulated data depends on the random seed, and thus the plots
#' and numbers here and in the book may differ. You can experiment
#' with the simulation variation by changing the seed.
#' 
#' -------------
#' 

#+ setup, include=FALSE
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
# switch this to TRUE to save figures in separate files
savefigs <- FALSE

#' #### Load packages
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("arm")
# for reproducibility of simulated data
SEED <- 1151

#' ## Simulated data from linear model
set.seed(SEED)
N <- 50
x <- runif(N, 1, 5)
y <- rnorm(N, 10 + 3*x, 3)
x_binary <- ifelse(x<3, 0, 1)
data <- data.frame(N, x, y, x_binary)

#' #### Regression with binary predictor
lm_1a <- lm(y ~ x_binary, data = data)
display(lm_1a)

#' #### Plots
#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("SimpleCausal/figs","overview_1a.pdf"), height=3.5, width=4.5)
#+
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x_binary, y, xlab="", ylab="Outcome measurement", pch=20, cex=.5, bty="l", main="Regression with binary treatment", cex.main=.9, xaxt="n", cex.lab=.9, cex.axis=.9)
axis(1, c(0,1), c("    Control", "Treatment    "), cex.axis=.9)
abline(coef(lm_1a)[1], coef(lm_1a)[2])
text(0.3, 13, paste("Estimated treatment effect is\nslope of fitted line: ", fround(coef(lm_1a)[2], 1)), cex=.8, adj=0)
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

#' #### Regression with continuous predictor
lm_1b <- lm(y ~ x, data = data)
display(lm_1b)

#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("SimpleCausal/figs","overview_1b.pdf"), height=3.5, width=4.5)
#+
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x, y, xlab="Treatment level", ylab="Outcome measurement", pch=20, cex=.5, bty="l", main="Regression with continuous treatment", cex.main=.9, cex.lab=.9, cex.axis=.9)
abline(coef(lm_1b)[1], coef(lm_1b)[2])
text(3.2, 15, paste("Estimated treatment\neffect per unit of x is\nslope of fitted line: ", fround(coef(lm_1b)[2], 1)), cex=.8, adj=0)
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

#' ## Simulated data from nonlinear model
set.seed(SEED)
y <- rnorm(N, 5 + 30*exp(-x), 2)
data$y <- y

#' #### Classical regression with continuous predictor
lm_2a <- lm(y ~ x, data = data)
display(lm_2a)

#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("SimpleCausal/figs","overview_2a.pdf"), height=3.5, width=4.5)
#+
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x, y, xlab="Treatment level", ylab="Outcome measurement", pch=20, cex=.5, bty="l", main="Nonlinear treatment effect", cex.main=.9, cex.lab=.9, cex.axis=.9)
curve(5 + 30*exp(-x), add=TRUE)
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("SimpleCausal/figs","overview_2b.pdf"), height=3.5, width=4.5)
#+
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x, y, xlab="Treatment level", ylab="Outcome measurement", pch=20, cex=.5, bty="l", main="Nonlinear effect, estimated with straight line fit", cex.main=.9, cex.lab=.9, cex.axis=.9)
abline(coef(lm_2a)[1], coef(lm_2a)[2])
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

#' ## Simulated data from two groups
set.seed(SEED)
N <- 100
xx <- rnorm(N, 0, 1)^2
z <- rep(0:1, N/2)
xx <- ifelse(z==0, rnorm(N, 0, 1.2)^2, rnorm(N, 0, .8)^2)
yy <- rnorm(N, 20 + 5*xx + 10*z, 3)
data <- data.frame(xx, z, yy)
lm_2 <- lm(yy ~ xx + z, data=data)
display(lm_2)

#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("SimpleCausal/figs","overview_3.pdf"), height=3.5, width=4.5)
#+
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(xx, yy, xlab="Pre-treatment predictor", ylab="Outcome measurement", bty="l", main="Continuous pre-treatment predictor and binary treatment    ", cex.main=.9, cex.lab=.9, cex.axis=.9, type="n")
points(xx[z==0], yy[z==0], pch=20, cex=.5)
points(xx[z==1], yy[z==1], pch=1, cex=1)
abline(coef(lm_2)[1], coef(lm_2)[2])
abline(coef(lm_2)[1] + coef(lm_2)[3], coef(lm_2)[2])
text(2.3, 29.5, "Controls", cex=.9, adj=0)
text(1.5, 45, "Treated", cex=.9, adj=0)
x0 <- 5.2
arrows(x0, coef(lm_2)[1] + coef(lm_2)[2]*x0, x0, coef(lm_2)[1] + coef(lm_2)[2]*x0 + coef(lm_2)[3], length=.1, code=3)
text(x0+.15, 1 + coef(lm_2)[1] + coef(lm_2)[2]*x0 + .5*coef(lm_2)[3], paste("Estimated\ntreatment\neffect is", fround(coef(lm_2)[3], 1)), cex=.8, adj=0)
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

for (j in 0:1) print(mean(yy[z==j]))
for (j in 0:1) print(mean(xx[z==j]))

